#### Plot of Figure 2 - Figure Supplement 2
# platereader DATA 20190109

#growth of ancestral strain and H5 double promoter mutant compared in M9 gal and gly (ctr)
###### read in data #################################
setwd("/Users/itomanek/Documents/promoter_evolution/experiments/platereader_data/2019_January/improving_DOG_gal_selection")

#OD600
OD=read.csv2("20190109_H5r_DOGr_IT028_OD.txt", header=TRUE, sep="\t") # will read, as . - for OD files from german computers!!
head(OD)    #need to put "Temperature" into the field for T?? Read 600 in excel
#YFP
YFP=read.csv2("20190109_H5r_DOGr_IT028_YFP.txt", header=TRUE, sep="\t")
#PLATE INFO (metadata)
plate_info=read.csv2("20190109_plate_layout.txt", header=TRUE, sep="\t")
?lm()
###### transform time (00:00:00 --> hours) ###########
time<-OD[,1]
time=as.character(time)
time=sapply(strsplit(time,":"),
            function(x) {
              x <- as.numeric(x)
              x[1]+x[2]/60
            })
time=round(time,2)
time #in hours (with minutes as comma)

OD$Time=time  #replace the old time format with time in min 
YFP$Time=time

###### combine data #################################
library("dplyr")
library(reshape2)
library("viridis")           
dev.off()
barplot(1:10, col = viridis(10))
###rearrange the data so that each row contains the value for one read of one well
reshaped <- melt(OD, id=c("Time"), variable.name="Well", value.name="OD600")
yfp_reshaped= melt(YFP, id=c("Time"), variable.name="Well",value.name="YFP")

annotated <- inner_join(reshaped, plate_info, by=c("Well") ,copy=TRUE)
annotated <- inner_join(annotated,yfp_reshaped, by=c("Well","Time"), copy=TRUE)
head(annotated)

###### Group & summarize data #################################

grouped <- group_by(annotated,Time, Well,strain ,sugar,concentration)
grouped<- transform(grouped, nYFP=YFP/OD600) #transform function adds a column
unique(grouped$strain)

#  mean OD600
sum_OD <-ddply(grouped, c("Time","strain","sugar","concentration"),summarise, mean=mean(OD600), sd=sd(OD600), sem=sd(OD600)/sqrt(length(OD600)))
sum_OD
##  mean normalized YFP
sum_nYFP <-ddply(grouped, c("Time","strain","sugar","concentration"),summarise, mean=mean(nYFP), sd=sd(nYFP), sem=sd(nYFP)/sqrt(length(nYFP)))

head(sum_OD)
###### PLOTS ######################################
#install.packages("ggplot2")
library(ggplot2)
#install.packages("Hmisc")
library("Hmisc")
library("Rmisc")
greys <-c("black","#4D4D4D", "#888888", "#AEAEAE", "#CCCCCC")
Ara_Palette <- c("#E0ECF4" ,"#BFD3E6", "#9EBCDA", "#8C96C6", "#8C6BB1", "#88419D", "#810F7C" ,"#4D004B")
pie(rep(1,8), col=Ara_Palette)

##################### ## ###########################
plot<- ggplot()+ theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 

#growth
plot+geom_line(data=subset(grouped,strain=="H5"), aes(x=Time, y=OD600, group=Well, color=as.factor(sugar), linetype=as.factor(concentration)))

#plot showing that H5r is adaptive over ancestor
plot+geom_line(data=subset(grouped,strain==c("H5","IT028")&sugar=="gal"&concentration=="0.1%"), aes(x=Time, y=OD600, group=Well, color=as.factor(strain), linetype=as.factor(concentration)))
plot+geom_line(data=subset(grouped,strain==c("H5","IT028")&sugar=="gly"), aes(x=Time, y=OD600, group=Well, color=as.factor(strain), linetype=as.factor(concentration)))

#growth in different wells of the same thing
plot+geom_line(data=subset(grouped,strain=="H5-galP-"&concentration=="0.001%"&sugar=="DOG"), aes(x=Time, y=OD600, group=Well, color=(strain), linetype=as.factor(concentration)))








##### #Growth Rates#####################################
# # sliding window looking for best fit of linear model!
## evaluates max. r^2 of fit and saves those values of intercept+slope
########################################################

#### FUNCTION TO CALCULATE GR
library("zoo")

find_GR<- function(OD) { 
  ##get "range" of well names
  rangewell=(as.character(unique(grouped$Well)))
  ##create empty list
  max_lm_df <- data.frame()
  head(max_lm_df)
  j=12*3+4
  for( j in 1: length(rangewell)) { 
    #define zooOD
    zooOD=data.frame(time, OD[,j+1]) #zooOD consists of 2 columns, second col.changes in loop 
    names(zooOD) <- c("Time","wellname") #need constant names
    #define lm_dataframe
    lm_df <- NULL
    lm_df <- rollapply(zooOD, width=20, by=1, FUN = function(Z) {  #width=20 sliding window of 50, by=1... sliding window moves one by one, by=50: mean of every 50 values is calculated
      linmods = lm(formula= log(wellname) ~ Time,data=as.data.frame(Z)); #as.data frame not zoo format here
      return(linmods$coeff); 
    },by.column=FALSE, align="left") #align=whether output indices are start, end, center of sliding window
    max_lm_df[j,1] <- rangewell[j] #well name
    max_lm_df[j,2] <- lm_df[which.max(lm_df[,2]),1]  # attach max slope+intercept to list 
    max_lm_df[j,3] <- lm_df[which.max(lm_df[,2]),2]
    # max_lm_list[j,4] <- lm_df  ##(could store all the slope values if max_lm was list)#tip: get individual dfs out of list with lapply!
  }
  names(max_lm_df) <- c("Well","GR_i","GR_s")  ##-rename headers for simplicity
  return(max_lm_df)
} #function end

##for finding the right GR purposes (rerunning the same code several times, to reset grouped)
grouped <- group_by(annotated,Time, Well,strain ,sugar,concentration)
grouped<- transform(grouped, nYFP=YFP/OD600) #transform function adds a column

#run find_GR function for each 96-well plate (=medium)
max_lm_df <- find_GR(OD)
grouped<- inner_join(grouped, max_lm_df, by= c("Well")) #join by Well

##check GR
plot(OD$Time,log(OD$A1)) 
abline(subset(grouped,Well=="A1")$GR_i,subset(grouped,Well=="A1")$GR_s) #yes now it works properly

## check GR
ggplot(subset(grouped, sugar =="gal"&strain=="H5-galP-"))+
  geom_line(aes(x=Time, y=OD600,color= GR_s, group=Well)) #! AWESOME

## check GR
ggplot(subset(grouped, sugar =="gal"&strain=="H5-galP-"))+
  geom_line(aes(x=Time, y=OD600,color= Well, group=Well)) #! AWESOME


##mean of growth rates###
sum_GR <-ddply(grouped, c("Time","strain","sugar","concentration"),summarise, mean=mean(GR_s), sd=sd(GR_s), sem=sd(GR_s)/sqrt(length(GR_s)))
head(sum_GR)

#Plot growth rates (GR_s) #################

###plot for Figure 2 - Figure Supplement 2
a=plot+geom_line(data=subset(sum_GR, sugar == "gal"&strain!="H5-galP-"), aes(x=concentration, y=mean, group=strain, color=(strain)))+
  geom_errorbar(data=subset(sum_GR, sugar == "gal"&strain!="H5-galP-"),aes(x=concentration, y=mean, ymin=mean-sem, ymax=mean+sem, color=strain), size=0.5, width=0.5, alpha=0.8)+
  ggtitle("growth rates in galactose")+ylim(c(0,0.32))

b=plot+geom_line(data=subset(sum_GR, sugar == "gly"&strain!="H5-galP-"), aes(x=concentration, y=mean, group=strain, color=(strain)))+
  geom_errorbar(data=subset(sum_GR, sugar == "gly"&strain!="H5-galP-"),aes(x=concentration, y=mean, ymin=mean-sem, ymax=mean+sem, color=strain), size=0.5, width=0.5, alpha=0.8)+
  ggtitle("growth rates in glycerol") +ylim(c(0,0.32))
multiplot(a,b,cols=2)
